{
 "cells": [
  {
   "cell_type": "code",
   "execution_count": 1,
   "metadata": {},
   "outputs": [],
   "source": [
    "import os\n",
    "\n",
    "os.environ['CUDA_VISIBLE_DEVICES'] = ''"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 3,
   "metadata": {},
   "outputs": [
    {
     "name": "stderr",
     "output_type": "stream",
     "text": [
      "2022-10-22 22:31:32.568674: I tensorflow/core/platform/cpu_feature_guard.cc:142] This TensorFlow binary is optimized with oneAPI Deep Neural Network Library (oneDNN) to use the following CPU instructions in performance-critical operations:  AVX2 FMA\n",
      "To enable them in other operations, rebuild TensorFlow with the appropriate compiler flags.\n",
      "2022-10-22 22:31:32.572728: E tensorflow/stream_executor/cuda/cuda_driver.cc:271] failed call to cuInit: CUDA_ERROR_NO_DEVICE: no CUDA-capable device is detected\n",
      "2022-10-22 22:31:32.572748: I tensorflow/stream_executor/cuda/cuda_diagnostics.cc:169] retrieving CUDA diagnostic information for host: husein-MS-7D31\n",
      "2022-10-22 22:31:32.572752: I tensorflow/stream_executor/cuda/cuda_diagnostics.cc:176] hostname: husein-MS-7D31\n",
      "2022-10-22 22:31:32.572813: I tensorflow/stream_executor/cuda/cuda_diagnostics.cc:200] libcuda reported version is: Not found: was unable to find libcuda.so DSO loaded into this program\n",
      "2022-10-22 22:31:32.572836: I tensorflow/stream_executor/cuda/cuda_diagnostics.cc:204] kernel reported version is: 470.141.3\n"
     ]
    },
    {
     "data": {
      "application/vnd.jupyter.widget-view+json": {
       "model_id": "a0e15d1c00df487ea41d00b565599656",
       "version_major": 2,
       "version_minor": 0
      },
      "text/plain": [
       "Downloading:   0%|          | 0.00/233M [00:00<?, ?B/s]"
      ]
     },
     "metadata": {},
     "output_type": "display_data"
    }
   ],
   "source": [
    "import malaya\n",
    "\n",
    "model_en_ms = malaya.translation.en_ms.transformer(model = 'noisy-base')\n",
    "model_ms_en = malaya.translation.ms_en.transformer(model = 'noisy-base')"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 4,
   "metadata": {},
   "outputs": [],
   "source": [
    "from sacrebleu.metrics import BLEU, CHRF, TER\n",
    "\n",
    "bleu = BLEU()\n",
    "chrf = CHRF(word_order = 2)"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 5,
   "metadata": {},
   "outputs": [
    {
     "data": {
      "text/plain": [
       "6854"
      ]
     },
     "execution_count": 5,
     "metadata": {},
     "output_type": "execute_result"
    }
   ],
   "source": [
    "from unidecode import unidecode\n",
    "import json\n",
    "\n",
    "with open('test-noisy-shuffled.json') as fopen:\n",
    "    test = fopen.read().split('\\n')\n",
    "    test = [json.loads(t) for t in test if len(t)]\n",
    "    \n",
    "len(test)"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 7,
   "metadata": {},
   "outputs": [
    {
     "data": {
      "text/plain": [
       "{'src': 'These scarps were found alll over tie moon and appear to be minimally weathered, indicating vat the geologic events hart creatd them wore fairly recent.',\n",
       " 'tgt': 'Serpihan ini telah ditemui di serata permukaan bulan dan masih kurang terjejas oleh faktor persekitaran, yakni secara geologinya menunjukkan bahan tersebut adalah baharu terbentuk.',\n",
       " 'prefix': 'terjemah Inggeris ke Melayu: '}"
      ]
     },
     "execution_count": 7,
     "metadata": {},
     "output_type": "execute_result"
    }
   ],
   "source": [
    "test[0]['translation']"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 8,
   "metadata": {},
   "outputs": [
    {
     "data": {
      "text/plain": [
       "['Selendang ini ditemui di seluruh bulan dan kelihatan tidak lapuk secara minimum, menunjukkan bahawa peristiwa geologi yang mencipta mereka adalah agak baru-baru ini.']"
      ]
     },
     "execution_count": 8,
     "metadata": {},
     "output_type": "execute_result"
    }
   ],
   "source": [
    "model_en_ms.greedy_decoder([test[0]['translation']['src']])"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 13,
   "metadata": {},
   "outputs": [
    {
     "name": "stderr",
     "output_type": "stream",
     "text": [
      "100%|███████████████████████████████████████| 6854/6854 [29:04<00:00,  3.93it/s]\n"
     ]
    }
   ],
   "source": [
    "from tqdm import tqdm\n",
    "\n",
    "results_en_ms, filtered_right_en_ms = [], []\n",
    "results_ms_en, filtered_right_ms_en = [], []\n",
    "for i in tqdm(range(len(test))):\n",
    "    t = test[i]['translation']\n",
    "    p = t['prefix']\n",
    "    s = t['src']\n",
    "    tgt = t['tgt']\n",
    "    \n",
    "    if not len(s) or not len(tgt):\n",
    "        continue\n",
    "        \n",
    "    if 'Inggeris ke Melayu' in p:\n",
    "        o = model_en_ms.greedy_decoder([s])[0]\n",
    "        results_en_ms.append(o)\n",
    "        filtered_right_en_ms.append(tgt)\n",
    "    else:\n",
    "        o = model_ms_en.greedy_decoder([s])[0]\n",
    "        results_ms_en.append(o)\n",
    "        filtered_right_ms_en.append(tgt)"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 14,
   "metadata": {},
   "outputs": [
    {
     "data": {
      "text/plain": [
       "(2937, 3917)"
      ]
     },
     "execution_count": 14,
     "metadata": {},
     "output_type": "execute_result"
    }
   ],
   "source": [
    "len(results_en_ms), len(results_ms_en)"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 15,
   "metadata": {},
   "outputs": [
    {
     "data": {
      "text/plain": [
       "({'name': 'BLEU',\n",
       "  'score': 41.8278308435666,\n",
       "  '_mean': -1.0,\n",
       "  '_ci': -1.0,\n",
       "  '_verbose': '73.1/49.7/35.3/25.4 (BP = 0.985 ratio = 0.985 hyp_len = 63506 ref_len = 64473)',\n",
       "  'bp': 0.9848884354831119,\n",
       "  'counts': [46414, 30108, 20323, 13889],\n",
       "  'totals': [63506, 60569, 57632, 54695],\n",
       "  'sys_len': 63506,\n",
       "  'ref_len': 64473,\n",
       "  'precisions': [73.08600762132711,\n",
       "   49.70859680694745,\n",
       "   35.263395335924486,\n",
       "   25.393546027973308],\n",
       "  'prec_str': '73.1/49.7/35.3/25.4',\n",
       "  'ratio': 0.9850014734850248},\n",
       " chrF2++ = 66.46)"
      ]
     },
     "execution_count": 15,
     "metadata": {},
     "output_type": "execute_result"
    }
   ],
   "source": [
    "refs = [filtered_right_en_ms]\n",
    "sys = results_en_ms\n",
    "r = bleu.corpus_score(sys, refs)\n",
    "r.__dict__, chrf.corpus_score(sys, refs)"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 16,
   "metadata": {},
   "outputs": [
    {
     "data": {
      "text/plain": [
       "({'name': 'BLEU',\n",
       "  'score': 40.77050068227297,\n",
       "  '_mean': -1.0,\n",
       "  '_ci': -1.0,\n",
       "  '_verbose': '72.0/48.5/34.8/25.5 (BP = 0.972 ratio = 0.972 hyp_len = 90382 ref_len = 92985)',\n",
       "  'bp': 0.9716107843891983,\n",
       "  'counts': [65074, 41942, 28753, 20040],\n",
       "  'totals': [90382, 86465, 82548, 78631],\n",
       "  'sys_len': 90382,\n",
       "  'ref_len': 92985,\n",
       "  'precisions': [71.9988493284061,\n",
       "   48.5074885791939,\n",
       "   34.83185540533992,\n",
       "   25.486131423993083],\n",
       "  'prec_str': '72.0/48.5/34.8/25.5',\n",
       "  'ratio': 0.9720062375651987},\n",
       " chrF2++ = 63.68)"
      ]
     },
     "execution_count": 16,
     "metadata": {},
     "output_type": "execute_result"
    }
   ],
   "source": [
    "refs = [filtered_right_ms_en]\n",
    "sys = results_ms_en\n",
    "r = bleu.corpus_score(sys, refs)\n",
    "r.__dict__, chrf.corpus_score(sys, refs)"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": null,
   "metadata": {},
   "outputs": [],
   "source": []
  }
 ],
 "metadata": {
  "kernelspec": {
   "display_name": "Python 3 (ipykernel)",
   "language": "python",
   "name": "python3"
  },
  "language_info": {
   "codemirror_mode": {
    "name": "ipython",
    "version": 3
   },
   "file_extension": ".py",
   "mimetype": "text/x-python",
   "name": "python",
   "nbconvert_exporter": "python",
   "pygments_lexer": "ipython3",
   "version": "3.8.10"
  }
 },
 "nbformat": 4,
 "nbformat_minor": 4
}
